//-----------------------------------------------------------------------
//
// Gentrification and pioneer businesses
// Kristian Behrens, Brahim Boualam, Julien Martin, and Florian Mayneris
//
// First version: 02/05/2017
// This  version: 18/11/2021
//
// Preparation of the NHGIS census variables for Philadelphia and Boston
//
//-----------------------------------------------------------------------

/*
This program provides :
	 - census data at the block level, or block group level, for Boston, Phil. and Detroit
	 
	 Input: NHGIS data
	 The census files are too big to be uploaded in the dropbox. 
	 
	 Output: - block_group_boston.dta, block_group_phil.dta, block_group_detroit.dta
			 - block_boston.dta, block_phil.dta, block_detroit.dta
*/


cd "/Users/kristianbehrens/Desktop/GENTRI_RESTAT_FINAL/"

*********************************************************************
******************** HISPANIC
********************************************************************* 


import delimited "data/census/nhgis_philadelphia_boston_block/hispanic/nhgis0030_ds120_1990_block.csv", encoding(ISO-8859-1) clear
rename eu0001 hispanic
keep gisjoin year hispanic
compress
save "temp/hispanic_90.dta", replace

clear 
import delimited "data/census/nhgis_philadelphia_boston_block/hispanic/nhgis0030_ds147_2000_block.csv", encoding(ISO-8859-1) clear
rename fxz001 hispanic
keep gisjoin year hispanic
compress
save "temp/hispanic_00.dta", replace

clear 
import delimited "data/census/nhgis_philadelphia_boston_block/hispanic/nhgis0030_ds172_2010_block.csv", encoding(ISO-8859-1) clear
rename h7y003 hispanic
keep gisjoin year hispanic
compress

append using "temp/hispanic_00.dta"
append using "temp/hispanic_90.dta"
save "temp/hispanic.dta", replace



*********************************************************************
******************** BLOCKS
********************************************************************* 

clear
import delimited "data/census/nhgis_philadelphia_boston_block/nhgis0029_ds120_1990_block.csv", encoding(ISO-8859-1) clear

rename et1001 pop
rename euo001 households
rename eux001 male
rename eux002 female
rename euy001 white
rename euy002 black
rename euy003 indian
rename euy004 asian_pacific
rename euy005 otherrace
rename esa001 housing
rename esn001 occupied
rename esn002 vacant
rename es1001 owner_occupied
rename es1002 renter_occupied

drop et*

save "temp/block_90.dta", replace

clear
import delimited "data/census/nhgis_philadelphia_boston_block/nhgis0029_ds147_2000_block.csv", encoding(ISO-8859-1)

rename fxs001 pop
rename fy4001 households
rename fyl001 male
rename fyl002 female
rename fye001 white
rename fye002 black
rename fye003 indian
rename fye004 asian
rename fye005 hawaiian
rename fye006 otherrace
rename fye007 two_other_race
gen asian_pacific= asian + hawaiian 
drop asian
drop hawaiian
rename fv5001  housing
rename fv8001 occupied
rename fv8002 vacant
rename fwa001 owner_occupied
rename fwa002 renter_occupied

drop fym*
tostring blocka, replace 
save "temp/block_00.dta", replace

clear
import delimited "data/census/nhgis_philadelphia_boston_block/nhgis0029_ds172_2010_block.csv", encoding(ISO-8859-1)

rename h7v001 pop
rename h8c001 households
rename h76002 male
rename h76026 female

rename h7x002 white
rename h7x003 black
rename h7x004 indian
rename h7x005 asian
rename h7x006 hawaiian
rename h7x007 otherrace
rename h7x008 two_other_race
gen asian_pacific= asian + hawaiian 
drop asian
drop hawaiian

*rename h77001 median_age

rename ifc001 housing
rename ife002 occupied
rename ife003 vacant
gen owner_occupied=iff002 + iff003
rename iff004 renter_occupied 

drop if* h8*  h7* 
tostring blocka, replace 
save "temp/block_10.dta", replace



*********
* Combine the datasets
********* 

use "temp/block_10.dta", clear
append using "temp/block_00.dta"
append using "temp/block_90.dta", force

merge 1:1 gisjoin year using "temp/hispanic.dta"
drop _m

label var pop "Population block" 
label var  households "Nb. of households"
label var  housing  "Nb. of housing units"
label var  occupied "Nb. of occupied housing units"
label var  vacant "Nb. of vacant housing units "
label var  owner_occupied "Nb. of owner occupied housing units"
label var  renter_occupied "Nb. of renter occupied housing units"
label var male "Nb. of males "
label var female "Nb. of females "
label var white "Nb. of white people"
label var black "Nb. of black people"
label var indian "Nb. of American indian native people"
label var asian_pacific "Nb. of asian and pacific islander people"
label var otherrace "Some other race alone"
label var two_other_race "Two or more races"
label var hispanic "Nb. of people with hispanic origins"

************ Rename variables consistent with NY *******************
rename white pop_white_block
rename black pop_black_block
rename asian pop_asian_block
gen pop_other_block=indian  + otherrace + two_other_race
drop indian  otherrace two_other_race
label var pop_other_block "Pop. other race(s)"
********************************************************************

gen county_id = statea*1000 + countya 

replace cousuba=cty_suba if cousuba==.
replace blkgrpa=blck_grpa if blkgrpa==.

keep gisjoin year state statea county county_id countya tracta blkgrpa blocka pop pop_white_block pop_black_block pop_asian_block pop_other_block male female households housing occupied vacant renter_occupied owner_occupied hispanic

order gisjoin-pop_black_block pop_asian_block pop_other_block

*** 1 BOSTON CBSA 2 PHILADELPHIA CBSA 3 DETROIT CBSA ***

gen city=0
replace city=1 if county_id==9015
replace city=1 if county_id==25001
replace city=1 if county_id==25005
replace city=1 if county_id==25009
replace city=1 if county_id==25017
replace city=1 if county_id==25021
replace city=1 if county_id==25023
replace city=1 if county_id==25025
replace city=1 if county_id==25027
replace city=1 if county_id==33001
replace city=1 if county_id==33011
replace city=1 if county_id==33013
replace city=1 if county_id==33015
replace city=1 if county_id==33017
replace city=1 if county_id==44001
replace city=1 if county_id==44003
replace city=1 if county_id==44005
replace city=1 if county_id==44007
replace city=1 if county_id==44009

replace city=2 if county_id==10001
replace city=2 if county_id==10003
replace city=2 if county_id==24015
replace city=2 if county_id==34001
replace city=2 if county_id==34005
replace city=2 if county_id==34007
replace city=2 if county_id==34009
replace city=2 if county_id==34011
replace city=2 if county_id==34015
replace city=2 if county_id==34033
replace city=2 if county_id==42011
replace city=2 if county_id==42017
replace city=2 if county_id==42029
replace city=2 if county_id==42045
replace city=2 if county_id==42091
replace city=2 if county_id==42101


drop county_id

preserve 
keep if city==1
drop city
compress
saveold "results/Boston/census_variables.dta", replace
restore 

preserve 
keep if city==2
drop city
compress
saveold "results/Philadelphia/census_variables.dta", replace
restore 

 
erase "temp/hispanic_90.dta"
erase "temp/hispanic_00.dta"
erase "temp/hispanic.dta"




*********************************************************************
******************** BLOCK GROUPS
*********************************************************************

* 1990

clear
import delimited "data/census/nhgis_philadelphia_boston_bgroup/nhgis0031_ds120_1990_blck_grp.csv", encoding(ISO-8859-1)

rename et1001 pop
*rename euo001 households
rename ess001 lower_q_value
rename est001 median_value
rename esu001 upper_q_value
rename es5001 lower_q_rent
rename es6001 median_rent
rename es7001 upper_q_rent

save "temp/block_group_1990_1.dta", replace

clear

import delimited "data/census/nhgis_philadelphia_boston_bgroup/nhgis0031_ds123_1990_blck_grp.csv", encoding(ISO-8859-1)

rename e4u001 median_household_income
rename e01001 per_capita_income 

gen pop_25= e33001   + e33002   + e33003   + e33004   + e33005   + e33006   + e33007   
gen no_college= e33001   + e33002   + e33003 
gen some_college = e33004   + e33005   + e33006   + e33007   
gen some_college_share = some_college/pop_25

rename eyu001 median_gross_rent
rename ex8001 median_year_built

merge 1:1 gisjoin using "temp/block_group_1990_1.dta"
drop _m
save "temp/block_group_1990.dta", replace
erase "temp/block_group_1990_1.dta"
clear

*2000 
clear
import delimited "data/census/nhgis_philadelphia_boston_bgroup/nhgis0032_ds147_2000_blck_grp.csv", encoding(ISO-8859-1)
rename fxs001 pop
*rename fy4001 households

save "temp/block_group_2000_1.dta", replace

clear

clear
import delimited "data/census/nhgis_philadelphia_boston_bgroup/nhgis0032_ds152_2000_blck_grp.csv", encoding(ISO-8859-1)
rename hf6001 median_household_income
rename hg4001 per_capita_income
rename g8u001 lower_q_value
rename g8v001 median_value
rename g8w001 upper_q_value
rename g73001 lower_q_rent
rename g74001 median_rent
rename g75001 upper_q_rent
rename g8c001 median_gross_rent
rename g68001 median_year_built

egen pop_25= rowtotal(hd*)

egen no_college_male= rowtotal(hd1001 - hd1009)
egen no_college_woman= rowtotal(hd1017 - hd1025)
gen no_college=  no_college_male+ no_college_woman

egen some_college_male = rowtotal(hd1010 - hd1016)
egen some_college_woman = rowtotal(hd1026- hd1032)
gen some_college=  some_college_male+ some_college_woman

gen some_college_share = some_college/pop_25
*gen no_college_share=no_college/pop_25

drop *_male *_woman

merge 1:1 gisjoin using "temp/block_group_2000_1.dta"
drop _m

save "temp/block_group_2000.dta", replace
erase "temp/block_group_2000_1.dta"
clear

*2010 
clear
import delimited "data/census/nhgis_philadelphia_boston_bgroup/nhgis0033_ds176_20105_2010_blck_grp.csv", encoding(ISO-8859-1)

rename jmae001 pop
 
rename joie001 median_household_income
rename jqbe001 per_capita_income
rename jthe001 lower_q_value
rename jtie001 median_value
rename jtje001 upper_q_value
rename jsye001 lower_q_rent
rename jsze001 median_rent
rename js0e001 upper_q_rent
rename js5e001 median_gross_rent
rename jsee001 median_year_built

gen pop_25= jn9e001

egen no_college_male= rowtotal(jn9e003 - jn9e011)
egen no_college_woman= rowtotal(jn9e020 - jn9e028)
gen no_college=  no_college_male+ no_college_woman

egen some_college_male = rowtotal(jn9e012 - jn9e018)
egen some_college_woman = rowtotal(jn9e029 - jn9e035)
gen some_college=  some_college_male+ some_college_woman

gen some_college_share = some_college/pop_25
*gen no_college_share=no_college/pop_25
*gen total=no_college_share + some_college_share
*tab total
drop *_male *_woman

drop year
gen year=2010

save "temp/block_group_2010.dta", replace

clear
use "temp/block_group_2010.dta"
append using "temp/block_group_2000.dta"
append using "temp/block_group_1990.dta"

replace blkgrpa=blck_grpa if blkgrpa==.

order gisjoin year  
keep gisjoin  year statea state blkgrpa countya county     tracta   pop median_household_income  per_capita_income  lower_q_value median_value upper_q_value lower_q_rent median_rent upper_q_rent some_college* no_college median_gross_rent median_year_built


label var pop "Population block group" 
label var  median_household_income "Median household income"
label var   per_capita_income  "Per capita income"
label var  lower_q_value "Lower housing value quartile"
label var  median_value "Median housing value "
label var   upper_q_value "Upper housing value quartile"
label var lower_q_rent "Lower contract rent quartile"
label var median_rent "Median Contract Rent "
label var upper_q_rent "Upper contract rent quartile"
label var  some_college "Pop 25+ w/ some-college" 
label var  some_college_share "Share pop 25+ w/ some-college"
label var no_college  "Pop 25+ w/ no-college" 
label var median_year_built "Median Year Structure Built"
label var median_gross_rent "Median gross rent"

rename pop pop_block_group
************ Rename variables consistent with NY *******************
rename some_college edu_college_block 
rename median_year_built median_yearb_block
rename median_rent median_crent_block
rename median_gross_rent median_grent_block
rename median_value median_value_block 
rename median_household_income median_income_block
rename per_capita_income percap_income_block
******************************************************************

gen county_id = statea*1000 + countya

order gisjoin year  county_id

*** 1 BOSTON CBSA 2 PHILADELPHIA CBSA 3 DETROIT CBSA ***

gen city=0
replace city=1 if county_id==9015
replace city=1 if county_id==25001
replace city=1 if county_id==25005
replace city=1 if county_id==25009
replace city=1 if county_id==25017
replace city=1 if county_id==25021
replace city=1 if county_id==25023
replace city=1 if county_id==25025
replace city=1 if county_id==25027
replace city=1 if county_id==33001
replace city=1 if county_id==33011
replace city=1 if county_id==33013
replace city=1 if county_id==33015
replace city=1 if county_id==33017
replace city=1 if county_id==44001
replace city=1 if county_id==44003
replace city=1 if county_id==44005
replace city=1 if county_id==44007
replace city=1 if county_id==44009

replace city=2 if county_id==10001
replace city=2 if county_id==10003
replace city=2 if county_id==24015
replace city=2 if county_id==34001
replace city=2 if county_id==34005
replace city=2 if county_id==34007
replace city=2 if county_id==34009
replace city=2 if county_id==34011
replace city=2 if county_id==34015
replace city=2 if county_id==34033
replace city=2 if county_id==42011
replace city=2 if county_id==42017
replace city=2 if county_id==42029
replace city=2 if county_id==42045
replace city=2 if county_id==42091
replace city=2 if county_id==42101


keep gisjoin-pop *_block city

drop county_id
preserve 
keep if city==1
drop city
compress
saveold "results/Boston/census_variables_bg.dta", replace
restore 

preserve 
keep if city==2
drop city
compress
saveold "results/Philadelphia/census_variables_bg.dta", replace
restore 


erase "temp/block_10.dta"
erase "temp/block_00.dta"
erase "temp/block_90.dta"
erase "temp/block_group_2010.dta"
erase "temp/block_group_2000.dta"
erase "temp/block_group_1990.dta"








